rm(list = ls())

#B1BFF00240DAC86C

library(tidyverse)
library(stargazer)
library(lfe)
library(broom) #for tidy
library(lmtest) # For coeftest()
library(sandwich) # For sandwich()
library(estimatr)
library(texreg)
library(psych)
library(default)
library(fixest)
library(corrplot)
library(scales)

options(tibble.print_min = 10)
source("functions.R")

load(file = 'data_output/persons_years_sample.Rdata')
load(file = 'data_output/persons_sample.Rdata')


# Earnings histrograms ----------------------------------------------------


p1 = full_sample %>% ggplot(aes(inc_labor)) + geom_histogram(bins = 100) +
  theme_bw() + 
  labs(x = "Yearly Earnings in 2015 euros",
       y = "Count")+
  scale_x_continuous(breaks = pretty_breaks(), labels = label_comma(), limits = c(-2000,275000)) +
  scale_y_continuous(breaks = pretty_breaks(), labels = label_comma())

ggsave("/figures/hist_inc_labor.pdf", height = 4, width = 7)

p2 = full_sample %>% ggplot(aes(inc_log)) + geom_histogram(bins = 1000) +
  theme_bw() + 
  labs(x = "log(Yearly Earnings)",
       y = "Count")
ggsave("/figures/hist_inc_log.pdf", height = 4, width = 7)

# corrplot ----------------------------------------------------------------

plot_dta = persons %>% 
  select(visuos, math, words, social, energy, delib, duty, achieve, leader, conf, educ_y, gpa_t, inc_labor)
names(plot_dta) = 
  c("Visuospatial", "Math", "Verbal", "Social", "Activity", "Deliberation", "Dutifullness",
      "Achievement", "Leadership", "Confidence", "Educ. Years", "School GPA", "Earnings at 39")

p1 = corrplot(
  cor(plot_dta, use = "pairwise.complete.obs"),
  method = "ellipse",
  #upper = "ellipse", 
  #lower = "number",
  addCoef.col = "black",
  tl.col = "firebrick",
  #addrect = 4,
  order = "hclust")

p1 = p1 %>% recordPlot
ggsave(file  = "/figures/correlations.pdf", plot = replayPlot(p1), width = 12, height = 10)


# Trends in earnings and employment -----------------------------------------


labor_trends = full_sample %>% 
  group_by(vuosi) %>% 
  summarise(Earnings = mean(inc_labor, na.rm = T),
            Employment = mean(emp_11, na.rm = T),
            Extensive = mean(extensive, na.rm = T))
  
labor_trends %>% 
  ggplot() +
  geom_line(aes(vuosi, Earnings, color = "Earnings", linetype = "Earnings")) +
  geom_line(aes(vuosi, Employment*40000, color = "Employment", linetype = "Employment")) +
  #geom_point(aes(vuosi, value, color = name, shape = name)) +
  scale_color_manual(name = "",
                     values = c("Employment" = "firebrick", "Earnings" = "steelblue")) +
  scale_linetype_manual(name = "",
                        values = c("Earnings" = "solid", "Employment" = "dashed")) +
  theme_bw() +
  scale_y_continuous(sec.axis = sec_axis(~./40000 , name = "Employment")) + 
  scale_x_continuous(breaks = 2000:2015, labels = paste0("'", str_sub(2000:2015, 3, 4))) +
  theme(axis.title.y = element_text(color = "steelblue"),
        axis.title.y.right = element_text(color = "firebrick"),
        axis.text.y = element_text(color = "steelblue"),
        axis.text.y.right = element_text(color = "firebrick"),
        legend.position = c(0.8,0.6)) +
  labs(x = "Year",
       y = "Mean Earnings (2015 euros)")

ggsave(file  = "/figures/labor_trends.pdf", width = 6, height = 3)



# How traits predict HS math over time ------------------------------------------------
# 
# nest_mincer = persons %>% 
#   #filter(inc_prop > 8) %>% 
#   group_by(synvuosi) %>% nest %>% 
#   mutate(mincer1 = map(data, ~lm_robust(math_anchored ~ cog + extro + consci, data = .)),
#          tidied1 = map(mincer1, tidy))
# 
# nest_mincer_unnest = nest_mincer %>% 
#   select(tidied1) %>% 
#   unnest(cols = c(tidied1)) %>% ungroup %>% 
#   filter(term %in% c("cog", "extro", "consci"))
# 
# nest_mincer_unnest %>% 
#   ggplot(aes(x = synvuosi, y = estimate, color = as.factor(term), shape = as.factor(term), linetype = as.factor(term))) +
#   geom_point(size = 3) +
#   geom_line(size = 0.6) +
#   geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = as.factor(term)), alpha = 0.1, color = NA)  +
#   theme_bw() +
#   scale_color_manual(name = "Coefficient", labels = var_names, values = color_types) +
#   scale_fill_manual(name = "Coefficient", labels = var_names, values = color_types) +
#   scale_shape_manual(name = "Coefficient", labels = var_names, values = point_types) +
#   scale_linetype_manual(name = "Coefficient", labels = var_names, values = line_types) +
#   labs(x = "Cohort",
#        y = "HS mathEstimate") +
#   scale_y_continuous(breaks = pretty) +
#   #scale_x_continuous(breaks = 2000:2015, labels = paste0("'", str_sub(2000:2015, 3, 4))) +
#   guides(fill = FALSE)


